/* Construct real values for the FHS industries. 
   First merge the industry samples based on product trailer data
   with the variables from the CMF base files.  
   Then merge the NBER-CES deflators, the capital stock 
   variables constructed from BEA and BLS capital data, the 
   entry/exit flags created in Create_entry_exit_flags.sas,
   and the industry cost shares.

	input files: 
		fhs.sugarf2002
		fhs.sugarf2007
		fhs.icef2002
		fhs.icef2007
		fhs.gasf2002
		fhs.gasf2007
		fhs.floorrf2002
		fhs.floorf2007
		fhs.carbonf2002
		fhs.carbonf2007
		fhs.plywoodf2002
		fhs.plywoodf2007
		fhs.coffeef2002
		fhs.coffeef2007
		fhs.breadf2002
		fhs.breadf2007
		fhs.boxesf2002
		fhs.boxesf2007
		fhs.concf2002
		fhs.concf2007
		cmf.cmf2002
		cmf.cmf2007
		nberces.naics5809
		bea.bea_naics
		bls.BLS_capital_FHS
		fhs.bcd0207
		indcs.cost_shares_FHS

	output files:
		fhs.physugarf
		fhs.phyicef
		fhs.phygasf
		fhs.phyfloorrf
		fhs.phycarbonf
		fhs.phyplywoodf
		fhs.phycoffeef
		fhs.phybreadf
		fhs.phyboxesf
		fhs.phyconcf

*/

/* OPTIONS OBS=500  NOREPLACE  ; */ 

%include "ASMimplibs.sas";


*** PART 1: SELECT PLANTS FROM INITIAL INDUSTRY DATASETS AND MERGE WITH CMF;

%macro getind(i,y);

   proc sort data=fhs.&i.&y out=&i.&y; by survu_id;run;

   data &i.&y;
   merge &i.&y (in=infinal) cmf.cmf&y (keep=survu_id firmid year NAICS_NEW tvs 
                                        tib tie ph sw ww tab tae ee cf cp cm pw te 
                                          tvs_f ph_f  ww_f tab_f tae_f ee_f cf_f cp_f cm_f pw_f te_f);
     by survu_id;
     if infinal;
   run;

%mend;

%getind(sugarf,2002);
%getind(icef,2002);
%getind(plywoodf,2002);
%getind(gasf,2002);
%getind(floorf,2002);
%getind(coffeef,2002);
%getind(carbonf,2002);
%getind(breadf,2002);
%getind(boxesf,2002);
%getind(concf,2002);

%getind(sugarf,2007);
%getind(icef,2007);
%getind(plywoodf,2007);
%getind(gasf,2007);
%getind(floorf,2007);
%getind(coffeef,2007);
%getind(carbonf,2007);
%getind(breadf,2007);
%getind(boxesf,2007);
%getind(concf,2007);


/***
*** PART 2: Construct the real "flow" variables needed for TFP calculations
***/

/* Read in NBER-CES deflators for FHS industries */

data deflators (keep = naics year piship pimat pien piinv);
 set nberces.naics5809;
 if year in (2002, 2007); 
run;

/* Convert NBER-CES price indexes to a base year of 2002. */

data deflators02 (keep = naics piship02_base97 pimat02_base97 pien02_base97 piinv02_base97);
 set deflators;
 if year=2002;
 piship02_base97 = piship;
 pimat02_base97 = pimat;
 pien02_base97 = pien;
 piinv02_base97 = piinv;
run;


proc datasets library=work nolist;
 modify deflators;
  rename piship = piship_base97;
  rename pimat = pimat_base97;
  rename pien = pien_base97;
  rename piinv = piinv_base97;
run;

proc sort data=deflators; by naics; run;
proc sort data=deflators02; by naics; run;

proc contents data=deflators;
run;

data deflators (keep = naics_char year piship pimat pien piinv);
 merge deflators02 deflators;
 by naics;
 piship = piship_base97/piship02_base97;
 pimat = pimat_base97/pimat02_base97;
 pien = pien_base97/pien02_base97;
 piinv = piinv_base97/piinv02_base97;
 naics_char = put(naics,z6.);
run;

proc datasets library=work nolist;
 modify deflators;
  rename naics_char=naics;
run;

proc sort data=deflators;
 by naics year;
run;

%macro create_real_values(ind);

 data &ind;
  set &ind;
  naics = substr(naics_new,1,6);
 run;

 proc sort data=&ind; by naics year; run;
 
 /* Merge the (base 2002) deflators with the stacked plant-level industry data. 
    and create the real flow variables. */

 data &ind._real ;
   merge &ind (in=infhs) deflators (in=incs) ;
   by naics year;
   if infhs;
 run;

 data &ind._real ;
  set &ind._real ;
  naics3=substr(naics,1,3);
  if naics3="312" then naics3="311";  /* Need to do this because BEA and BLS capital data aggregate 311 and 312. */

  * "output" ;
  * We could also try including changes in inventories in our measure of output;
  * But for the first pass, just use shipments;
  q = tvs/piship;
  if q>0 then lnq = log(q);
  else lnq = .;

  * energy ;
  e = sum(cf,ee)/pien; 
  if e > 0 then le = log(e);
  else le = .;

  * labor input;
  if ph > 0 then pw_wage = ww/ph;
    else pw_wage=.;
    if pw_wage>0 then pwhours = sw/pw_wage;  
    else pwhours = .;
    if pwhours>0 then lth = log(pwhours);
  else lth = .;

  * materials;
  if cm > sum(cf,ee)
  then matq = sum(cm,-1*cf,-1*ee)/pimat;
  else matq = cm/pimat;
  if matq > 0 then lm = log(matq);
  else lm = .;

 run;

%mend create_real_values;


* Stack the industry-year datasets and create the real flow variables; 

data sugarf; set sugarf2002 sugarf2007;run;
%create_real_values(sugarf);

data icef; set icef2002 icef2007;run;
%create_real_values(icef);

data plywoodf; set plywoodf2002 plywoodf2007;run;
%create_real_values(plywoodf);

data gasf; set gasf2002 gasf2007;run;
%create_real_values(gasf);

data floorf; set floorf2002 floorf2007;run;
%create_real_values(floorf);

data coffeef; set coffeef2002 coffeef2007;run;
%create_real_values(coffeef);

data carbonf; set carbonf2002 carbonf2007;run;
%create_real_values(carbonf);

data breadf; set breadf2002 breadf2007;run;
%create_real_values(breadf);

data boxesf; set boxesf2002 boxesf2007;run;
%create_real_values(boxesf);

data concf; set concf2002 concf2007;run;
%create_real_values(concf);


/***
**** PART 3: Merge in the BEA and BLS capital data and construct the capital stock variables ;
***/


 data beacapital (keep = naics_bea naics_bea7 year nkcst gkhst);
  set bea.bea_naics;
   naics_bea7 = substr(naics_bea,1,7);
   if naics_bea7 in ('311-312','321    ','322    ','324    ','325    ','327    ')
      and year in (2002,2007);
 run;

 /* The BEA capital data aggregates some of the NAICS3 categories,
    so we need to create a new variables for NAICS3. */

 data beacapital (keep = naics3 year nkcst gkhst); 
  set beacapital;
   if naics_bea7 = "311-312" then naics3="311"; /* Food and tobacco products */
   else naics3=substr(naics_bea7,1,3);
 run;


 proc sort data=beacapital; by naics3 year; run;

 /* Merge in the BLS investment price deflator for all assets. */

 data bls_capital_FHS;
  set bls.BLS_capital_FHS;
  if year in (2002,2007);
 run;

 proc sort data = BLS_capital_FHS out=bls_capital_FHS (keep = naics3 year piinv_base02 ); 
  by naics3 year; 
 run;

%macro create_real_capital(ind);

 proc sort data=&ind._real; by naics3 year; run;

 data &ind._real;
  merge &ind._real (in=infhs) beacapital;
  by naics3 year;
  if infhs;
 run;

 data &ind._real;
  merge &ind._real (in=infhs) bls_capital_FHS (in=inbls);
  by naics3 year;
  if infhs and inbls;
 run;

  /* Here I'm using the historical vs. current BEA data for structures
    for total capital assets.  The CMF no longer breaks out book values of 
    assets by equipment and structures. 
    To convert this to 2002 dollars, I am using the BLS investment price
    deflator for "all assets". */
 
 data &ind._real;
  set &ind._real;
    /*  k=tae*(nkcst/gkhst)/piinv_base02; */
      k=tab*(nkcst/gkhst)/piinv_base02;
      if k>0 then lk = log(k);
 run;

%mend create_real_capital;

%create_real_capital(sugarf);
%create_real_capital(icef);
%create_real_capital(plywoodf);
%create_real_capital(gasf);
%create_real_capital(floorf);
%create_real_capital(coffeef);
%create_real_capital(carbonf);
%create_real_capital(breadf);
%create_real_capital(boxesf);
%create_real_capital(concf);

  
/****
***** PART 4: Merge the entry/exit/continuer dummies with the FHS dataset. 
*****/

%macro merge_entryexit(ind);

  proc sort data=&ind._real; by survu_id; run;

  data &ind._real_w_bcd (drop=te02 te07);
   merge  &ind._real (in=infhs) fhs.bcd0207 (in=inbcd) ;
  by survu_id;
   if infhs;
  run;

%mend merge_entryexit;

%merge_entryexit(sugarf);
%merge_entryexit(icef);
%merge_entryexit(plywoodf);
%merge_entryexit(gasf);
%merge_entryexit(floorf);
%merge_entryexit(coffeef);
%merge_entryexit(carbonf);
%merge_entryexit(breadf);
%merge_entryexit(boxesf);
%merge_entryexit(concf);


/***
**** PART 5: Merge in the industry cost shares ;
***/

data cost_shares_FHS (drop=naics);
 set indcs.cost_shares_FHS;
  naics_char = put(naics,z6.);
run;
proc datasets library=work nolist;
 modify cost_shares_FHS; rename naics_char=naics;
run;

proc sort data=cost_shares_FHS out=cost_shares_FHS; by naics year; run;

%macro merge_cs(ind);

  proc sort data=&ind._real_w_bcd; by naics year; run;

  data &ind._w_cs;
   merge cost_shares_FHS (in=incs) &ind._real_w_bcd (in=infhs);
   by naics year;
   if infhs;
  run;

%mend merge_cs;

%merge_cs(sugarf);
%merge_cs(icef);
%merge_cs(plywoodf);
%merge_cs(gasf);
%merge_cs(floorf);
%merge_cs(coffeef);
%merge_cs(carbonf);
%merge_cs(breadf);
%merge_cs(boxesf);
%merge_cs(concf);

/***
*** PART 6: Identify imputed items;
***/

%macro id_imputes(ind);

 data &ind._w_imp_fl;
  set &ind._w_cs;
 if TVS_F in (' B',' H',' L',' M',' J',' S',' V','RB','RE','RH','RL','RM','RJ','RS','RV') 
 then tvs_imp = 1;
 else tvs_imp = 0;

 if cm_F in (' B',' H',' L',' M',' S',' V','RB','RE','RH','RL','RM','RS')
 then cm_imp = 1;
 else cm_imp = 0;

 if cp_F in (' D',' H',' J',' L',' T',' V', 'RH','RJ','RL','RK','RT','RV')
 then cp_imp = 1;
 else cp_imp = 0;

 if ee_f in (' H',' V','RH','RV') 
 then ee_imp = 1;
 else ee_imp = 0;

 if cf_f in (' H',' V','RH','RV') 
 then cf_imp = 1;
 else cf_imp = 0;

 if year=2002
 then do;
   if PH_F in (' B',' H',' M'/*,'RS'*/) 
   THEN ph_imp = 1;
   else ph_imp = 0;

 /* Make PH missing when TVS>0 and (PW>0 or WW>0 or TE>0) and PH=0.
   In 2002 the PH_F flag is missing in many cases.  These PH=0 cases are probably
   either imputed or misreported. */

   if TAB_F = ' K' and TAE_F=' K' 
   then do;
     tab_imp = 1;
     tae_imp = 1;
   end;
   else do;
     tab_imp = 0;
     tae_imp = 0;
   end;

 end;
 else do;
   if PH_F in (' B',' H',' M','RB','RH','RL') 
   THEN ph_imp = 1;
   else ph_imp = 0;
 end;

 if tvs>0 and ph=0 and (pw>0 or ww>0 or TE>0) then ph= . ;

 if PW_F IN (' B',' H',' J',' L',' M',' V','RB','RE','RH','RJ','RL','RM','RV') 
 THEN pw_imp = 1;
 else pw_imp = 0;

 if ww_f in (' B',' H',' M') 
 then ww_imp = 1;
 else ww_imp = 0;

 if te_f in (' B',' H','RB') 
 then te_imp = 1;
 else te_imp = 0; 

 if year=2007 then do;
    if TAB_F in ( ' B', ' D',' J',' V')  
    then tab_imp = 1;
    else tab_imp = 0;
    if TAE_F in ( ' B', ' D',' J',' V')  
    then tae_imp = 1;
    else tae_imp = 0;
 end;



run;

/** If ALL of the key variables are zero or imputed,
    then delete the plant, to be consistent with 
    how we select the sample for CART-imputes.
***/

  proc contents data=&ind._w_imp_fl;
  run;

  data fhs.phy&ind;
   set &ind._w_imp_fl;
   if pqs_valimpr > 0.50 then pqs_imp = 1;
   if pv_valimpr > 0.50 then pv_imp = 1;
   /* Make PQS missing if PV is positive but PQS is zero */
   if pv>0 and pqs = 0 then pqs_imp = 1;
/**   if tvs_imp=1 and pqs_imp=1 and pv_imp=1 and cm_imp=1 and cf_imp=1 and ee_imp=1 and tab_imp=1 and sw=. and ph_imp=1 and ww_imp=1 and pw_imp=1 then delete;
**/
  run;

  proc datasets library=fhs nolist;
   modify phy&ind;
    label k = 'Real capital stock, 2002 $K, adjusted from book values';
    label lk = 'Log of real capital stock, 2002 $K, adjusted from book values';
    label e = 'Real energy costs, 2002 $K, deflated sum of cost of fuels and cost of purchased electricity';
    label le = 'Log of real energy costs, 2002 $K';
    label matq = 'Real cost of materials, 2002 $K, (without energy)';
    label lm = 'Log of real cost of materials (without energy), 2002 $K';
    label q = 'Real total value of shipments, 2002 $K';
    label lnq = 'Log of real total value of shipments, 2002 $K';
    label lth = 'Log of total (production-worker-equivalent) hours';
    label pwhours = 'Total production-worker-equivalent hours'; 
    label year = 'year';
  run;

%mend id_imputes;


%id_imputes(sugarf);
%id_imputes(icef);
%id_imputes(plywoodf);
%id_imputes(gasf);
%id_imputes(floorf);
%id_imputes(coffeef);
%id_imputes(carbonf);
%id_imputes(breadf);
%id_imputes(boxesf);
**%id_imputes(concf);
